% Obesity and diabetes rate in the United States between 1995
% and 2015
% First import file:
% Here the imported file was saved as 'temp_obes_diab_2.mat'

clear all
temp= load('...\temp_obes_diab_2.mat')
temp=struct2table(temp);
%temp=lungca_smoking;
temp=table2array(temp);

s=50; % number of states
te= zeros(s,1);
tl = 21; % length of time series per state
v=100; % number of reshufflings of the dataset with preservation of the temporal order of the data per state
r=zeros(s,2);

% Location of jar (for TE calculation):
javaaddpath('...\infodynamics-dist-1.2.1/infodynamics.jar')



for i0=1:v;
p = randperm(s);
p = transpose(p);

for i = 1:s;
    for j = 1:tl;
        b(tl*(i-1)+j,1)=i; % create vector of unique numeric state identifiers 
        c(tl*(i-1)+j,1) = p(i,1); % shuffle the states, but preserve the unique sequence of measurements per state
    end
end

a2= horzcat(b,temp);

a3= zeros(size(a2,1),size(a2,2)+1);
a3(:,1)=c; % matrix containing the reshuffled dataset; its first column indicates the order of reshuffling

for i=1:s;
    d = find(a3(:,1)==i);
    a3(min(d):max(d),2:end)=a2((i-1)*tl+1:i*tl,:);
end

a4=a3(:,2:end);

temp2(:,:,i0)=a4;

for i=0:(s-1)  ;
    
  o=temp2((i*tl+1):((i+1)*tl), 5 ,i0); % vector containing obesity prevalence
  t=temp2((i*tl+1):((i+1)*tl), 6,i0); % vector containing diabetes prevalence
   
 
    if i==0
        o2=o;
        t2=t;
    else
        
    o2=vertcat(o2,o); % o2 is a vector containing all the US obesity prevalence data of all states in one column
    t2=vertcat(t2,t); % t2 is a vector containing all the US diabetes prevalence data of all states in one column
    
    end
  
end

   o2=(o2-mean(o2))/std(o2);
   t2=(t2-mean(t2))/std(t2);
    
  % OPTIONAL: add jitter 
  % sourceArray=jitter(t2);
  % destArray =jitter(o2);
   
  sourceArray=o2;
  destArray =t2;
   
 
% Create a TE calculator and run it (Kraskov estimator):
teCalc=javaObject('infodynamics.measures.continuous.kraskov.TransferEntropyCalculatorKraskov');
teCalc.initialise(1); % Use history length 1 (Schreiber k=1)
teCalc.setProperty('k', '4'); % Use Kraskov parameter K=4 for 4 nearest points
% Perform calculation with correlated source:
teCalc.setObservations(sourceArray, destArray);
result = teCalc.computeAverageLocalOfObservations();

cs = teCalc.computeSignificance(1000);
average = getMeanOfDistribution(cs);
sd = getStdOfDistribution(cs);
tscore = getTSscore(cs); %Assuming the distribution is Gaussian, return a t-score for our observed measurement
pvalue = cs.pValue

te3(i0,1)=result;
te3(i0,2)=average;
te3(i0,3)=sd;
te3(i0,4)=pvalue;

%%% Calculate correlation
[r,p]=corrcoef(sourceArray,destArray);
te3(i0,5)=r(1,2) ; %Cross-correlation at lag 0
te3(i0,6)=p(1,2); %p-value of the cross-correlation at lag 0


end
  
    

mean_bootstrap=@(x)(mean(x));

[bootstat_mean_ci bootstat_mean]= bootci(1000,{mean_bootstrap,te3(:,1)},'type','bca');
average_bootstrapped_meanTE=mean(bootstat_mean)


[bootstat_pvalue_ci bootstat_pvalue] = bootci(1000,{mean_bootstrap,te3(:,4)},'type','bca');
average_bootstrapped_pvalueTE=mean(bootstat_pvalue)

%%% Bootstrapped mean and p-value for the cross-correlation at lag 0
[bootstat_meanCorr_ci bootstat_meanCorr]= bootci(1000,{mean_bootstrap,te3(:,5)},'type','bca');
average_bootstrapped_meanCrorr=mean(bootstat_meanCorr)


[bootstat_pvalueCorr_ci bootstat_pvalueCorr] = bootci(1000,{mean_bootstrap,te3(:,6)},'type','bca');
average_bootstrapped_pvalueCorr=mean(bootstat_pvalueCorr)
